Assignment 13


In [33]:
# Import packages
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import scipy.stats as ss
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.optimize import curve_fit
from pylab import *
import itertools
from lmfit import Model

np.random.seed(12345678)  # for reproducibility, set random seed

# Read in data
df = pd.read_csv('../output.csv')

nvox = 64*64*48 # assume number of voxels per bin
df['weighted'] = df['synapses']/df['unmasked']*nvox

xvals = df['cx'].unique()
yvals = df['cy'].unique()
zvals = df['cz'].unique()

# Get rid of the blank edges
bottom = 0;
top = len(xvals);
right = 0;
left = len(yvals);
for z in zvals:
    this_z = df[df['cz']==z]
    
    # X direction
    xhist, bin_edges = np.histogram(this_z['cx'], weights = this_z['unmasked']/(nvox*len(yvals)), bins=len(xvals))
    
    bottom = max(bottom, np.argmax(xhist>0.5))
    top = min(top, len(xvals)-np.argmax(xhist[::-1]>0.5))
    
    # Y direction
    yhist, bin_edges = np.histogram(this_z['cy'], weights = this_z['unmasked']/(nvox*len(xvals)), bins=len(yvals))
    
    right = max(right, np.argmax(yhist>0.5))
    left = min(left, len(yvals)-np.argmax(yhist[::-1]>0.5))

# Copy new dataset without edges
df2 = df.copy()
for z in zvals:
    df2.drop(df2.index[(df2['cx']<xvals[bottom]) | (df2['cx']>=xvals[top])], inplace=True)
    df2.drop(df2.index[(df2['cy']<yvals[right]) | (df2['cy']>=yvals[left])], inplace=True)

xvals = df2['cx'].unique()
yvals = df2['cy'].unique()
zvals = df2['cz'].unique()

df2.head()


Out[33]:
cx cy cz unmasked synapses weighted
6292 448 1369 55 126357 153 238.063772
6293 448 1369 166 139932 207 290.840237
6294 448 1369 277 150269 194 253.824488
6295 448 1369 388 138071 159 226.410122
6296 448 1369 499 150842 258 336.278119

Fit 2D data to X-Y coordinate sums


In [2]:
dfxy = pd.pivot_table(df2, index='cy', columns='cx', values='weighted', aggfunc=np.sum)

df3 = dfxy.unstack().rename('weighted').reset_index()
df3.head()


Out[2]:
cx cy weighted
0 448 1369 2753.537582
1 448 1408 2458.095787
2 448 1447 2852.294254
3 448 1486 2656.401842
4 448 1525 3079.484039

In [3]:
from numpy.polynomial import polynomial as P

def polyfit2d(x, y, f, deg):
    x = np.asarray(x)
    y = np.asarray(y)
    f = np.asarray(f)
    deg = np.asarray(deg)
    vander = P.polyvander2d(x, y, deg)
    vander = vander.reshape((-1,vander.shape[-1]))
    f = f.reshape((vander.shape[0],))
    c = np.linalg.lstsq(vander, f)[0]
    return c.reshape(deg+1)

x_point = np.array(df3['cx'])
y_point = np.array(df3['cy'])
synapses = np.array(df3['weighted'])

for deg in range(5):
    popt = polyfit2d(x_point, y_point, synapses, deg=[deg, deg])
    chi_squared = np.sum((P.polyval2d(x_point, y_point, popt) - synapses) ** 2)
    reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
    print 'Testing polynomial model of degree =', deg
    print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
    print 'The chi squared value is: ', ("%.2f" % chi_squared)
    print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
    print 

popt = polyfit2d(np.log(x_point), np.log(y_point), synapses, deg=[1, 1])
chi_squared = np.sum((P.polyval2d(np.log(x_point), np.log(y_point), popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing logarithmic model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

popt = polyfit2d(np.log(x_point), np.log(y_point), np.log(synapses), deg=[1, 1])
chi_squared = np.sum((np.exp(P.polyval2d(np.log(x_point), np.log(y_point), popt)) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing powerlaw model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

print '2D polynomial of degree = 2 gives the best fit'


Testing polynomial model of degree = 0
The degrees of freedom for this test is 6383
The chi squared value is:  496469994.38
The reduced chi squared value is:  77780.04

Testing polynomial model of degree = 1
The degrees of freedom for this test is 6382
The chi squared value is:  369231539.88
The reduced chi squared value is:  57855.15

Testing polynomial model of degree = 2
The degrees of freedom for this test is 6381
The chi squared value is:  317215775.16
The reduced chi squared value is:  49712.55

Testing polynomial model of degree = 3
The degrees of freedom for this test is 6380
The chi squared value is:  352160304.73
The reduced chi squared value is:  55197.54

Testing polynomial model of degree = 4
The degrees of freedom for this test is 6379
The chi squared value is:  1268526259.88
The reduced chi squared value is:  198859.74

Testing logarithmic model
The degrees of freedom for this test is 6382
The chi squared value is:  386588834.21
The reduced chi squared value is:  60574.87

Testing powerlaw model
The degrees of freedom for this test is 6382
The chi squared value is:  391450994.31
The reduced chi squared value is:  61336.73

2D polynomial of degree = 2 gives the best fit

Plot the fitted and actual number of synapses distribution


In [4]:
popt = polyfit2d(x_point, y_point, synapses, deg=[2, 2])
pred = P.polyval2d(x_point, y_point, popt)
df3['pred'] = pred

sns.set_style('white')

fs = 18
tfs = 14

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df3['cx'], ys = df3['cy'], zs = df3['weighted'], 
                     s = df3['weighted']/100, c = df3['weighted'], cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
    
X, Y = np.meshgrid(xvals, yvals)
Z = pd.pivot_table(df3, index='cy', columns='cx', values='pred')

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
                          linewidth=0, antialiased=False)

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
                          linewidth=0, antialiased=False)
patches = ax.scatter(xs = df3['cx'], ys = df3['cy'], zs = df3['weighted'], 
                     s = df3['weighted']/100, c = df3['weighted'], cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual and Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()



In [5]:
residual = abs(pred - df3['weighted'])

sns.set_style('white')

fs = 18
tfs = 14

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df3['cx'], ys = df3['cy'], zs = residual, 
                     s = residual/10, c = residual, cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Difference of Actual and Fitted Number of Synapses", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()


Aligning Y-layers by means


In [6]:
mean = df2['weighted'].mean()
ymeans = [df2[df2['cy']==y]['weighted'].mean() for y in yvals]

df4 = df2.copy()
for y, ymean in zip(yvals, ymeans):
    df4.loc[df4['cy']==y, 'weighted'] = df4.loc[df4['cy']==y, 'weighted'] - (ymean - mean)

df4.head()


Out[6]:
cx cy cz unmasked synapses weighted
6292 448 1369 55 126357 153 223.086287
6293 448 1369 166 139932 207 275.862752
6294 448 1369 277 150269 194 238.847003
6295 448 1369 388 138071 159 211.432637
6296 448 1369 499 150842 258 321.300634

In [7]:
fs = 18

bottom = df4['weighted'].min()
top = df4['weighted'].max()

sns.set_style('white')

avals_list = [xvals, yvals, zvals]
anames = ['cx', 'cy', 'cz']
alabels = ['X (um)', 'Y (um)', 'Z (um)']
ascales = [2, 2, 1]

for avals, aname, alabel, ascale in zip(avals_list, anames, alabels, ascales):
    delta = 2*(avals[1]-avals[0])
    extra = delta / (np.sqrt(3)/2)
    left = np.min(avals) - extra
    right = np.max(avals) + extra
    
    with sns.plotting_context('notebook', font_scale=1.5):
        g = sns.jointplot(x=aname, y='weighted', data = df4, kind='hex',
                          joint_kws={'gridsize':len(avals)/ascale+1, 'extent':(left, right, bottom, top)},
                          marginal_kws={'bins':len(avals)/ascale})
        g = g.plot_joint(sns.regplot, scatter=False, color='pink', line_kws={'linewidth': 4})
        sns.axlabel(alabel, 'Weighted synapses', fontsize=fs)


Fit 2D data to Y-aligned data


In [8]:
dfxy = pd.pivot_table(df4, index='cy', columns='cx', values='weighted', aggfunc=np.sum)

df5 = dfxy.unstack().rename('weighted').reset_index()
df5.head()


Out[8]:
cx cy weighted
0 448 1369 2588.785248
1 448 1408 2343.006049
2 448 1447 2759.237726
3 448 1486 2519.679015
4 448 1525 2931.327981

Test different 2D models


In [9]:
from numpy.polynomial import polynomial as P

def polyfit2d(x, y, f, deg):
    x = np.asarray(x)
    y = np.asarray(y)
    f = np.asarray(f)
    deg = np.asarray(deg)
    vander = P.polyvander2d(x, y, deg)
    vander = vander.reshape((-1,vander.shape[-1]))
    f = f.reshape((vander.shape[0],))
    c = np.linalg.lstsq(vander, f)[0]
    return c.reshape(deg+1)

x_point = np.array(df5['cx'])
y_point = np.array(df5['cy'])
synapses = np.array(df5['weighted'])

for deg in range(5):
    popt = polyfit2d(x_point, y_point, synapses, deg=[deg, deg])
    chi_squared = np.sum((P.polyval2d(x_point, y_point, popt) - synapses) ** 2)
    reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
    print 'Testing polynomial model of degree =', deg
    print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
    print 'The chi squared value is: ', ("%.2f" % chi_squared)
    print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
    print 

popt = polyfit2d(np.log(x_point), np.log(y_point), synapses, deg=[1, 1])
chi_squared = np.sum((P.polyval2d(np.log(x_point), np.log(y_point), popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing logarithmic model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

popt = polyfit2d(np.log(x_point), np.log(y_point), np.log(synapses), deg=[1, 1])
chi_squared = np.sum((np.exp(P.polyval2d(np.log(x_point), np.log(y_point), popt)) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing powerlaw model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

print '2D polynomial of degree = 2 gives the best fit'


Testing polynomial model of degree = 0
The degrees of freedom for this test is 6383
The chi squared value is:  359140617.31
The reduced chi squared value is:  56265.18

Testing polynomial model of degree = 1
The degrees of freedom for this test is 6382
The chi squared value is:  341398566.23
The reduced chi squared value is:  53493.98

Testing polynomial model of degree = 2
The degrees of freedom for this test is 6381
The chi squared value is:  304238990.09
The reduced chi squared value is:  47678.89

Testing polynomial model of degree = 3
The degrees of freedom for this test is 6380
The chi squared value is:  339413567.81
The reduced chi squared value is:  53199.62

Testing polynomial model of degree = 4
The degrees of freedom for this test is 6379
The chi squared value is:  1107177349.69
The reduced chi squared value is:  173565.97

Testing logarithmic model
The degrees of freedom for this test is 6382
The chi squared value is:  350037567.44
The reduced chi squared value is:  54847.63

Testing powerlaw model
The degrees of freedom for this test is 6382
The chi squared value is:  351360422.51
The reduced chi squared value is:  55054.91

2D polynomial of degree = 2 gives the best fit

Plot the fitted and actual number of synapses distribution


In [10]:
popt = polyfit2d(x_point, y_point, synapses, deg=[2, 2])
pred = P.polyval2d(x_point, y_point, popt)
df5['pred'] = pred

sns.set_style('white')

fs = 18
tfs = 14

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df5['cx'], ys = df5['cy'], zs = df5['weighted'], 
                     s = df5['weighted']/100, c = df5['weighted'], cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
    
X, Y = np.meshgrid(xvals, yvals)
Z = pd.pivot_table(df5, index='cy', columns='cx', values='pred')

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
                          linewidth=0, antialiased=False)

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
                          linewidth=0, antialiased=False)
patches = ax.scatter(xs = df5['cx'], ys = df5['cy'], zs = df5['weighted'], 
                     s = df5['weighted']/100, c = df5['weighted'], cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual and Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()



In [11]:
residual = abs(pred - df5['weighted'])

sns.set_style('white')

fs = 18
tfs = 14

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df5['cx'], ys = df5['cy'], zs = residual, 
                     s = residual/10, c = residual, cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Difference of Actual and Fitted Number of Synapses", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()


3D clustering by synapse density


In [10]:
X = df2.loc[:,('cx','cy','cz')]
Xnew = X.copy()
for i, x in enumerate(xvals):
    Xnew.loc[Xnew['cx']==x, 'cx'] = i
for j, y in enumerate(yvals):
    Xnew.loc[Xnew['cy']==y, 'cy'] = j
for k, z in enumerate(zvals):
    Xnew.loc[Xnew['cz']==z, 'cz'] = k
Xnew.columns = ['i', 'j', 'k']
Xnew.head()


Out[10]:
i j k
6292 0 0 0
6293 0 0 1
6294 0 0 2
6295 0 0 3
6296 0 0 4

In [29]:
from sklearn.cluster import DBSCAN

# Compute DBSCAN
db = DBSCAN(eps=1, min_samples=2000).fit(Xnew, sample_weight=df2['weighted'])
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

print('Estimated number of clusters: %d' % n_clusters_)


Estimated number of clusters: 142

In [47]:
# Plot result

sns.set_style('white')

fs = 18
tfs = 14

# Black removed and is used for noise instead.
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = 'k'

    class_member_mask = (labels == k)

    xyz = X[class_member_mask & core_samples_mask]
    ax.scatter(xyz['cx'], xyz['cy'], xyz['cz'], c=col)

plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title('Estimated number of clusters: %d' % n_clusters_, fontsize=fs)
# plt.colorbar(patches)
plt.tight_layout()
plt.show()



In [60]:
# Plot result

sns.set_style('white')

fs = 18
tfs = 14

# Black removed and is used for noise instead.
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

print "Cluster sizes:"
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = 'k'

    class_member_mask = (labels == k)

    xyz = X[class_member_mask & core_samples_mask]
    if len(xyz)>1000:
        print "group", k, ":", np.sum(class_member_mask)
        ax.scatter(xyz['cx'], xyz['cy'], xyz['cz'], c=col)
        
        xyz = X[class_member_mask & ~core_samples_mask]
        ax.scatter(xyz['cx'], xyz['cy'], xyz['cz'], c=col, s=6)
    
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title('Large clusters', fontsize=fs)
# plt.colorbar(patches)
plt.tight_layout()
plt.show()


Cluster sizes:
group 0 : 5599
group 83 : 6074

In [69]:
fs = 18
tfs = 14

df2['label']  = labels

for k in [0, 83]:
    plt.figure(figsize=(16,8))
    print "Group", k
    
    for zi, z in enumerate(zvals):
        XY = pd.pivot_table(df2[df2['cz']==z], index='cy', columns='cx', values='weighted')
        XYlabel = pd.pivot_table(df2[df2['cz']==z], index='cy', columns='cx', values='label')
        XYmask = XYlabel != k
        
        plt.subplot(3,4,1+zi)
        sns.heatmap(XY, xticklabels=40, yticklabels=30, 
                    vmin=df2['weighted'].min(), vmax=df2['weighted'].max(), cbar=False, cmap=cm.get_cmap('winter'),
                    mask=XYmask)
        plt.gca().set_title('Z = '+str(z)+' um', fontsize=fs)
        plt.gca().set_xlabel('X (um)', fontsize=fs)
        plt.gca().set_ylabel('Y (um)', fontsize=fs)
        plt.tick_params(axis='both', which='major', labelsize=tfs)

    plt.tight_layout()
    plt.show()


Group 0
Group 83